'''
* This is the projet for Brtc LlmOps Platform
* @Author Leon-liao <liaosiliang@alltman.com>
* @Description //TODO 
* @File: 12_study_use_recursive_spiltter_with_code.py
* @Time: 2025/10/29
* @All Rights Reserve By Brtc
'''
from langchain_community.document_loaders import  UnstructuredFileLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter, Language

loader = UnstructuredFileLoader("./6_self_define_document.py")
text_splitter = RecursiveCharacterTextSplitter.from_language(
    Language.PYTHON,
    chunk_size=500,
    chunk_overlap=50,
    add_start_index=True
)

docs  = loader.load()
chunks = text_splitter.split_documents(docs)

for chunk in chunks:
    print(f"块大小：{len(chunk.page_content)}, 元数据：{chunk.metadata}")
print(chunks[0].page_content)
